{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Dumping model to file cache c:\\users\\zhuwen~1\\appdata\\local\\temp\\jieba.cache\n",
      "Loading model cost 3.453 seconds.\n",
      "Prefix dict has been built succesfully.\n"
     ]
    }
   ],
   "source": [
    "# -*- coding:utf-8 -*-\n",
    "# 分词\n",
    "import pandas as pd\n",
    "import jieba # 导入结巴分词包\n",
    "\n",
    "inputfile1 = u'3_1my_meidi_jd_process_end_负面情感结果.txt' \n",
    "inputfile2 = u'3_2my_meidi_jd_process_end_正面情感结果.txt'\n",
    "data1 = pd.read_csv(inputfile1, encoding = 'utf-8', header = None) #(***)\n",
    "data2 = pd.read_csv(inputfile2, encoding = 'utf-8', header = None) #(***)\n",
    "\n",
    "mycut = lambda s: \" \".join(jieba.cut(s)) # 自定义简单分词函数\n",
    "data1 = data1[0].apply(mycut) # 通过广播形式分词，加快速度\n",
    "data2 = data2[0].apply(mycut) # 通过广播形式分词，加快速度\n",
    "\n",
    "data1.to_csv(u'4_1my_meidi_jd_process_end_负面情感结果_cut.txt', header = False, index = False, encoding='utf-8') # (***)\n",
    "data2.to_csv(u'4_2my_meidi_jd_process_end_正面情感结果_cut.txt', header = False, index = False, encoding='utf-8') # (***)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
